rm(list=ls(all=TRUE))
pacman::p_load(vcd, magrittr, readr, caTools, ggplot2, dplyr, plotly,tidyverse,ggrepel)
load("data/tf0.rdata")
load("data/tf3.rdata")
load("data/tf4.rdata")
load("data/tf5.rdata")#了解資料數量(顧客數、商品數...)
sapply(list(cust=A0,tid=X0,items=Z0), nrow)## cust tid items
## 32241 119328 817182
#4個月內來店次數
A0 <- A0 %>% group_by(cust) %>% mutate(Freq = sum(f))
#讓分佈圖看起來更清楚
A1 <- A0 %>% filter(Freq < 31) A1$Freq <- as.factor(A1$Freq)
pie <- ggplot(A1,
aes(x = factor(1), fill = Freq)) +
geom_bar(width = 1) #將資料用成百分比
pie <- pie + coord_polar(theta = "y")
pie <- pie + theme_void()#將數值去掉
pie
透過圓餅圖了解顧客這四個月到店的消費次數,發現超過一半的人只來1.2次而已
m = A0$m > 993; f = A0$f >= 3; a = A0$area %in% c("z115","z221")
Status2 = case_when(
m & f ~ "A", #購買金額高次數高
!m & !f ~ "D", #購買金額低次數低
!m & f & !a ~ "E", #購買金額低次數高不住附近
!m & f & a ~ "F", #購買金額低次數高住附近
m & !f & !a ~ "B", #購買金額高次數低不住附近
m & !f & a ~ "C" #購買金額高次數低住附近
)
Status3 = case_when(
m & !f ~ "a1", #潛力暴發戶
!m & f ~ "a2") #忠心小犬
table(Status2)## Status2
## A B C D E F
## 4431 3808 3160 11223 2417 7202
table(Status3)## Status3
## a1 a2
## 6968 9619
#把分群納入資料框中
A0<-cbind(A0,Status2)## New names:
## * NA -> ...11
names(A0)[11] <- "status"
#將資料合併
B0 = X0[,c("cust","date")]
AB <- left_join(A0,B0)## Joining, by = "cust"
#把日期轉換成月份
AB$month <- format(as.Date(AB$date),format="%Y%m")
g2 = ggplot(AB,aes(x = month, fill = status)) +
geom_bar()
ggplotly(g2)#把A與Z合併
AZ <- left_join(A0,Z0)## Joining, by = c("cust", "age", "area")
CustSegments = AB %>%
group_by(month,status) %>% summarise(
average_frequency = mean(f),
average_amount = mean(m),
total_revenue = sum(rev),
total_no_orders = sum(Freq),
average_recency = mean(r),
average_seniority = mean(s),
group_size = n())## `summarise()` has grouped output by 'month'. You can override using the `.groups` argument.
df = CustSegments %>% transmute(
`群組` = as.character(status),
`month` = month,
`平均購買次數` = average_frequency,
`平均客單價` = average_amount,
`總營收貢獻` = total_revenue
)ggplot(df, aes(
x=`平均購買次數`,y=`平均客單價`,color=`群組`,group=`群組`,ids=month)) +
geom_point(aes(size=`總營收貢獻`),alpha=0.8) +
scale_size(range=c(2,12)) -> g
ggplotly(g)
透些資料探索我們可以發現一些趨勢和需改善的問題,
我們在分群後,將主要行銷對象設定為忠心小犬(E&F)及潛力暴發戶(B&C),
提高此2群顧客的消費頻率及平均客單價,為大豐創造更多利潤
feb01 = as.Date("2001-02-01")
Z = subset(Z0, date < feb01) # 618212#依據分析對象彙整資料
X = group_by(Z, tid) %>% summarise(
date = first(date), # 交易日期
cust = first(cust), # 顧客 ID
age = first(age), # 顧客 年齡級別
area = first(area), # 顧客 居住區別
items = n(), # 交易項目(總)數
pieces = sum(qty), # 產品(總)件數
total = sum(price), # 交易(總)金額
gross = sum(price - cost) # 毛利
) %>% data.frame # 88387sapply(X[,6:9], quantile, prob=c(.999, .9995, .9999))## items pieces total gross
## 99.9% 56.0000 84.0000 9378.684 1883.228
## 99.95% 64.0000 98.0000 11261.751 2317.087
## 99.99% 85.6456 137.6456 17699.325 3389.646
X = subset(X, items<=64 & pieces<=98 & total<=11260) # 88387 -> 88295d0 = max(X$date) + 1
A = X %>% mutate(
days = as.integer(difftime(d0, date, units="days"))
) %>%
group_by(cust) %>% summarise(
r = min(days), # recency
s = max(days), # seniority
f = n(), # frquency
m = mean(total), # monetary
rev = sum(total), # total revenue contribution
raw = sum(gross), # total gross profit contribution
age = age[1], # age group
area = area[1], # area code
) %>% data.frame # 28584
nrow(A)## [1] 28584
feb = filter(X0, date>= feb01) %>% group_by(cust) %>%
summarise(amount = sum(total)) # 16900A$amountA = merge(A, feb, by="cust", all.x=T)A$buyA$buy = !is.na(A$amount)
table(A$buy, !is.na(A$amount))##
## FALSE TRUE
## FALSE 15342 0
## TRUE 0 13242
summary(A)## cust r s f
## Length:28584 Min. : 1.00 Min. : 1.00 Min. : 1.000
## Class :character 1st Qu.:11.00 1st Qu.:47.00 1st Qu.: 1.000
## Mode :character Median :21.00 Median :68.00 Median : 2.000
## Mean :32.12 Mean :61.27 Mean : 3.089
## 3rd Qu.:53.00 3rd Qu.:83.00 3rd Qu.: 4.000
## Max. :92.00 Max. :92.00 Max. :60.000
##
## m rev raw age
## Min. : 8.0 Min. : 8 Min. : -742.0 Length:28584
## 1st Qu.: 359.4 1st Qu.: 638 1st Qu.: 70.0 Class :character
## Median : 709.5 Median : 1566 Median : 218.0 Mode :character
## Mean : 1012.4 Mean : 2711 Mean : 420.8
## 3rd Qu.: 1315.0 3rd Qu.: 3426 3rd Qu.: 535.0
## Max. :10634.0 Max. :99597 Max. :15565.0
##
## area amount buy
## Length:28584 Min. : 8 Mode :logical
## Class :character 1st Qu.: 454 FALSE:15342
## Mode :character Median : 993 TRUE :13242
## Mean : 1499
## 3rd Qu.: 1955
## Max. :28089
## NA's :15342
LoyalDog <-A %>% filter(m<993 & f>=3) #購買金額低 購買次數多 忠心小犬
Upstart <-A %>% filter(m>993 & f<3) #購買金額高 購買次數低 潛力暴發戶LoyalDogX = subset(X, cust %in% LoyalDog$cust & date < as.Date("2001-02-01"))
LoyalDogZ = subset(Z, cust %in% LoyalDog$cust & date < as.Date("2001-02-01"))
set.seed(2018); LoyalDogspl = sample.split(LoyalDog$buy, SplitRatio=0.7)#sample.split回傳分割向量 得出train跟test
c(nrow(LoyalDog), sum(LoyalDogspl), sum(!LoyalDogspl))## [1] 7366 5156 2210
cbind(LoyalDog, LoyalDogspl) %>% filter(buy) %>%
ggplot(aes(x=log(amount))) + geom_density(aes(fill=LoyalDogspl), alpha=0.5)
檢查忠心小犬在購買機率的分佈情形,從圖型可看出兩者間存在些許差異
LoyalDog2 = subset(LoyalDog, buy) %>% mutate_at(c("m","rev","amount"), log10)
n = nrow(LoyalDog2)
set.seed(2018); LoyalDogspl2 = 1:n %in% sample(1:n, round(0.7*n))
c(nrow(LoyalDog2), sum(LoyalDogspl2), sum(!LoyalDogspl2))## [1] 5430 3801 1629
cbind(LoyalDog2, LoyalDogspl2) %>%
ggplot(aes(x=amount)) + geom_density(aes(fill=LoyalDogspl2), alpha=0.5)
檢查忠心小犬在購買機率的分佈情形,從圖型可看出兩者間存在些許差異
UpstartX = subset(X, cust %in% Upstart$cust & date < as.Date("2001-02-01"))
UpstartZ = subset(Z, cust %in% Upstart$cust & date < as.Date("2001-02-01"))
set.seed(2018); Upstartspl = sample.split(Upstart$buy, SplitRatio=0.7)
#sample.split回傳分割向量 得出train跟test
c(nrow(Upstart), sum(Upstartspl), sum(!Upstartspl))## [1] 6986 4890 2096
cbind(Upstart, Upstartspl) %>% filter(buy) %>%
ggplot(aes(x=log(amount))) + geom_density(aes(fill=Upstartspl), alpha=0.5)
檢查潛在暴發戶在購買機率的分佈情形,從圖型可看出兩者間存在些許差異
Upstart2 = subset(Upstart, buy) %>% mutate_at(c("m","rev","amount"), log10)
n = nrow(Upstart2)
set.seed(2018); Upstartspl2 = 1:n %in% sample(1:n, round(0.7*n))
c(nrow(Upstart2), sum(Upstartspl2), sum(!Upstartspl2))## [1] 2115 1480 635
cbind(Upstart2, Upstartspl2) %>%
ggplot(aes(x=amount)) + geom_density(aes(fill=Upstartspl2), alpha=0.5)
檢查潛在暴發戶在購買機率的分佈情形,從圖型可看出兩者間存在些許差異
LoyalDogTR = subset(LoyalDog, LoyalDogspl)
LoyalDogTS = subset(LoyalDog, !LoyalDogspl)LoyalDogglm1 = glm(buy ~ ., LoyalDogTR[,c(2:9, 11)], family=binomial())
summary(LoyalDogglm1)##
## Call:
## glm(formula = buy ~ ., family = binomial(), data = LoyalDogTR[,
## c(2:9, 11)])
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.3539 -1.0687 0.5654 0.8392 1.6185
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.041e-01 3.661e-01 -1.377 0.168560
## r -1.824e-02 2.071e-03 -8.808 < 2e-16 ***
## s 8.277e-03 2.269e-03 3.648 0.000264 ***
## f 1.641e-01 3.735e-02 4.393 1.12e-05 ***
## m -7.993e-04 3.373e-04 -2.370 0.017783 *
## rev 1.952e-04 7.982e-05 2.446 0.014462 *
## raw -4.185e-04 2.180e-04 -1.920 0.054880 .
## agea29 3.454e-01 1.735e-01 1.991 0.046431 *
## agea34 2.349e-01 1.600e-01 1.468 0.142061
## agea39 3.301e-01 1.568e-01 2.106 0.035227 *
## agea44 3.220e-01 1.621e-01 1.987 0.046906 *
## agea49 3.504e-01 1.664e-01 2.105 0.035259 *
## agea54 4.108e-02 1.785e-01 0.230 0.818019
## agea59 1.292e-01 2.193e-01 0.589 0.555668
## agea64 2.586e-01 2.253e-01 1.148 0.251034
## agea69 3.749e-01 1.886e-01 1.988 0.046866 *
## agea99 -1.329e-02 2.874e-01 -0.046 0.963131
## areaz106 1.668e-01 3.665e-01 0.455 0.649046
## areaz110 -4.612e-02 2.935e-01 -0.157 0.875146
## areaz114 -8.860e-02 3.059e-01 -0.290 0.772125
## areaz115 2.629e-01 2.651e-01 0.992 0.321387
## areaz221 1.821e-01 2.670e-01 0.682 0.495214
## areazOthers 2.534e-01 2.890e-01 0.877 0.380669
## areazUnknown -2.210e-01 3.021e-01 -0.731 0.464510
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 5939.4 on 5155 degrees of freedom
## Residual deviance: 5252.2 on 5132 degrees of freedom
## AIC: 5300.2
##
## Number of Fisher Scoring iterations: 6
LoyalDogpred = predict(LoyalDogglm1, LoyalDogTS, type="response")
LoyalDogcm = table(actual = LoyalDogTS$buy, predict = LoyalDogpred > 0.5); LoyalDogcm## predict
## actual FALSE TRUE
## FALSE 72 509
## TRUE 74 1555
LoyalDogacc.ts = LoyalDogcm %>% {sum(diag(.))/sum(.)}
c(1-mean(LoyalDogTS$buy) , LoyalDogacc.ts) ## [1] 0.2628959 0.7361991
用模型後,正確率從0.26提升到0.73
colAUC(LoyalDogpred, LoyalDogTS$buy) # 0.6940807## [,1]
## FALSE vs. TRUE 0.6940807
LoyalDog2 = subset(LoyalDog, LoyalDog$buy) %>% mutate_at(c("m","rev","amount"), log10)
LoyalDogTR2 = subset(LoyalDog2, LoyalDogspl2)
LoyalDogTS2 = subset(LoyalDog2, !LoyalDogspl2)LoyalDoglm1 = lm(amount ~ ., LoyalDogTR2[,c(2:6,8:10)])
summary(LoyalDoglm1)##
## Call:
## lm(formula = amount ~ ., data = LoyalDogTR2[, c(2:6, 8:10)])
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.88216 -0.20627 0.04979 0.26805 1.28653
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.579e-01 1.065e-01 6.179 7.14e-10 ***
## r -5.668e-05 5.211e-04 -0.109 0.913381
## s -4.758e-04 5.375e-04 -0.885 0.376087
## f 2.076e-02 2.850e-03 7.286 3.88e-13 ***
## m 5.485e-01 7.562e-02 7.254 4.88e-13 ***
## rev 1.672e-01 7.058e-02 2.369 0.017892 *
## agea29 6.640e-02 3.649e-02 1.820 0.068912 .
## agea34 8.172e-02 3.381e-02 2.417 0.015678 *
## agea39 1.242e-01 3.291e-02 3.774 0.000163 ***
## agea44 1.087e-01 3.357e-02 3.238 0.001212 **
## agea49 8.509e-02 3.460e-02 2.459 0.013981 *
## agea54 4.283e-02 3.760e-02 1.139 0.254799
## agea59 1.509e-02 4.490e-02 0.336 0.736851
## agea64 -3.931e-03 4.609e-02 -0.085 0.932037
## agea69 -3.131e-02 3.883e-02 -0.806 0.420165
## agea99 8.588e-02 5.725e-02 1.500 0.133700
## areaz106 3.940e-02 8.821e-02 0.447 0.655118
## areaz110 2.933e-02 7.420e-02 0.395 0.692627
## areaz114 4.295e-02 7.791e-02 0.551 0.581452
## areaz115 5.354e-02 6.755e-02 0.793 0.428055
## areaz221 5.556e-02 6.789e-02 0.818 0.413227
## areazOthers 1.521e-02 7.234e-02 0.210 0.833522
## areazUnknown 6.502e-02 7.461e-02 0.871 0.383568
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4101 on 3778 degrees of freedom
## Multiple R-squared: 0.2944, Adjusted R-squared: 0.2903
## F-statistic: 71.66 on 22 and 3778 DF, p-value: < 2.2e-16
LoyalDogr2.tr = summary(LoyalDoglm1)$r.sq
LoyalDogSST = sum((LoyalDogTS2$amount - mean(LoyalDogTR2$amount))^ 2)
LoyalDogSSE = sum((predict(LoyalDoglm1, LoyalDogTS2) - LoyalDogTS2$amount)^2)
LoyalDogr2.ts = 1 - (LoyalDogSSE/LoyalDogSST)
c(LoyalDogR2train=LoyalDogr2.tr, LoyalDogR2test=LoyalDogr2.ts)## LoyalDogR2train LoyalDogR2test
## 0.2944228 0.2814886
UpstartTR = subset(Upstart, Upstartspl)
UpstartTS = subset(Upstart, !Upstartspl)Upstartglm1 = glm(buy ~ ., UpstartTR[,c(2:9, 11)], family=binomial())
summary(Upstartglm1)##
## Call:
## glm(formula = buy ~ ., family = binomial(), data = UpstartTR[,
## c(2:9, 11)])
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.4070 -0.8118 -0.7207 1.2682 2.0709
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.272e+00 3.348e-01 -6.786 1.15e-11 ***
## r -4.551e-03 2.300e-03 -1.979 0.04787 *
## s 2.898e-03 2.334e-03 1.242 0.21429
## f 8.174e-01 1.585e-01 5.156 2.53e-07 ***
## m 8.995e-05 8.429e-05 1.067 0.28587
## rev -4.111e-05 6.870e-05 -0.598 0.54962
## raw -1.794e-04 1.521e-04 -1.179 0.23831
## agea29 1.049e-01 2.274e-01 0.461 0.64469
## agea34 1.934e-01 2.141e-01 0.903 0.36635
## agea39 2.741e-01 2.120e-01 1.293 0.19601
## agea44 2.516e-01 2.154e-01 1.168 0.24277
## agea49 3.478e-01 2.219e-01 1.568 0.11698
## agea54 1.328e-01 2.410e-01 0.551 0.58164
## agea59 4.141e-01 2.676e-01 1.547 0.12183
## agea64 2.866e-01 2.943e-01 0.974 0.33009
## agea69 7.772e-01 2.838e-01 2.738 0.00617 **
## agea99 6.528e-01 3.359e-01 1.943 0.05197 .
## areaz106 -4.957e-02 2.233e-01 -0.222 0.82432
## areaz110 -1.015e-01 1.763e-01 -0.576 0.56492
## areaz114 7.368e-02 1.908e-01 0.386 0.69932
## areaz115 2.392e-01 1.668e-01 1.434 0.15147
## areaz221 2.253e-01 1.670e-01 1.349 0.17740
## areazOthers 2.372e-02 1.772e-01 0.134 0.89350
## areazUnknown -5.492e-01 2.446e-01 -2.245 0.02478 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 5996.1 on 4889 degrees of freedom
## Residual deviance: 5783.7 on 4866 degrees of freedom
## AIC: 5831.7
##
## Number of Fisher Scoring iterations: 4
Upstartpred = predict(Upstartglm1, UpstartTS, type="response")
Upstartcm = table(actual = UpstartTS$buy, predict = Upstartpred > 0.5); Upstartcm## predict
## actual FALSE TRUE
## FALSE 1439 22
## TRUE 615 20
Upstartacc.ts = Upstartcm %>% {sum(diag(.))/sum(.)}
c(1-mean(UpstartTS$buy) , Upstartacc.ts) # 用模型後的正確率## [1] 0.6970420 0.6960878
colAUC(Upstartpred, UpstartTS$buy) # 0.6173465## [,1]
## FALSE vs. TRUE 0.6173465
Upstart2 = subset(Upstart, Upstart$buy) %>% mutate_at(c("m","rev","amount"), log10)
UpstartTR2 = subset(Upstart2, Upstartspl2)
UpstartTS2 = subset(Upstart2, !Upstartspl2)Upstartlm1 = lm(amount ~ ., UpstartTR2[,c(2:5,8:10)])
summary(Upstartlm1)##
## Call:
## lm(formula = amount ~ ., data = UpstartTR2[, c(2:5, 8:10)])
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.5957 -0.2129 0.0606 0.2884 1.0700
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.9568564 0.2101547 4.553 5.73e-06 ***
## r 0.0002358 0.0007470 0.316 0.7524
## s 0.0001052 0.0007586 0.139 0.8897
## f 0.0629430 0.0331501 1.899 0.0578 .
## m 0.6138312 0.0555493 11.050 < 2e-16 ***
## agea29 -0.0158818 0.0782064 -0.203 0.8391
## agea34 0.0230267 0.0718094 0.321 0.7485
## agea39 0.1058886 0.0714651 1.482 0.1386
## agea44 0.0812381 0.0727503 1.117 0.2643
## agea49 -0.0209388 0.0742414 -0.282 0.7780
## agea54 0.0087930 0.0816758 0.108 0.9143
## agea59 0.0738311 0.0899099 0.821 0.4117
## agea64 -0.0334659 0.0969835 -0.345 0.7301
## agea69 -0.0654049 0.0940655 -0.695 0.4870
## agea99 0.1521789 0.1166531 1.305 0.1923
## areaz106 -0.0154511 0.0749086 -0.206 0.8366
## areaz110 0.0075053 0.0596472 0.126 0.8999
## areaz114 -0.0612669 0.0638730 -0.959 0.3376
## areaz115 -0.0749400 0.0555279 -1.350 0.1774
## areaz221 -0.0334140 0.0554566 -0.603 0.5469
## areazOthers 0.0303755 0.0597953 0.508 0.6115
## areazUnknown -0.1239351 0.0863085 -1.436 0.1512
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4099 on 1458 degrees of freedom
## Multiple R-squared: 0.1083, Adjusted R-squared: 0.09542
## F-statistic: 8.429 on 21 and 1458 DF, p-value: < 2.2e-16
Upstartr2.tr = summary(Upstartlm1)$r.sq
UpstartSST = sum((UpstartTS2$amount - mean(UpstartTR2$amount))^ 2)
UpstartSSE = sum((predict(Upstartlm1, UpstartTS2) - UpstartTS2$amount)^2)
Upstartr2.ts = 1 - (UpstartSSE/UpstartSST)
c(UpstartR2train=Upstartr2.tr, UpstartR2test=Upstartr2.ts)## UpstartR2train UpstartR2test
## 0.10826081 0.03312782
資料日期為2000-12-01 ~ 2001~02-28
d0 = max(X0$date) + 1
B = X0 %>%
filter(date >= as.Date("2000-12-01")) %>%
mutate(days = as.integer(difftime(d0, date, units="days"))) %>%
group_by(cust) %>% summarise(
r = min(days), # recency
s = max(days), # seniority
f = n(), # frquency
m = mean(total), # monetary
rev = sum(total), # total revenue contribution
raw = sum(gross), # total gross profit contribution
age = age[1], # age group
area = area[1], # area code
) %>% data.frame # 28584
LoyalDogB <-B %>% filter(m<993 & f>=3) #購買金額低 購買次數多 忠心小犬LoyalDogB$Buy = predict(LoyalDogglm1, LoyalDogB, type="response")LoyalDogB2 = LoyalDogB %>% mutate_at(c("m","rev"), log10)
LoyalDogB$Rev = 10^predict(LoyalDoglm1, LoyalDogB2)對忠心小犬客群的預測購買金額做指數、對數轉換
par(mfrow=c(1,2), cex=0.8)
hist(LoyalDogB$Buy)
hist(log(LoyalDogB$Rev,10))
從上圖可發現,忠心小犬的購買機率落在0.7~0.9之間(該族群的購買機率確實偏高)
而在預期營收方面,呈現常態分佈,預期營收平均約落在899左右
UpstartB <-B %>% filter(m>993 & f<3) #購買金額高 購買次數低 潛力暴發戶
UpstartB$Buy = predict(Upstartglm1, UpstartB, type="response")UpstartB2 = UpstartB %>% mutate_at(c("m","rev"), log10)
UpstartB$Rev = 10^predict(Upstartlm1, UpstartB2)對忠心小犬客群的預測購買金額做指數、對數轉換
par(mfrow=c(1,2), cex=0.8)
hist(UpstartB$Buy)
hist(log(UpstartB$Rev,10))
從上圖可發現,潛力潛力暴發戶的購買機率較為分散,與忠心小犬相比其購買機率明顯較低
而在預期營收方面,呈現常態分佈,預期營收平均約落在1252左右
接著我們要做出具體的行銷方向,再透過假設進行市場模擬,
了解出這些行銷方案在忠心小犬與潛力暴發戶上到底有沒有效
DP = function(x,m0,b0,a0) {m0*plogis((10/a0)*(x-b0))}margin)marLoyalDog = sum(LoyalDogB$raw)/sum(LoyalDogB$rev)
marUpstart = sum(UpstartB$raw)/sum(UpstartB$rev)我們針對兩族群進行毛利率估計後,發現忠心小犬(loyaldog)為0.14;
潛力暴發戶(upstart)則為0.17
m=0.2; b=25; a=40; x=30
LoyalDogdp = pmin(1-LoyalDogB$Buy, DP(x,m,b,a))
LoyalDogeR = LoyalDogdp*LoyalDogB$Rev*marLoyalDog - x
hist(LoyalDogeR,main="預期報償分佈",xlab="預期報償",ylab="顧客數")
針對LoyalDog這個族群分析預期報償,發現約落在-30到10之間,
於是我們根據上圖所得的結果,針對預期報償大於0之顧客進行促銷
m=0.2; b=25; a=40; X = seq(10,45,1)
LoyalDogdf = sapply(X, function(x) {
LoyalDogdp = pmin(DP(x,m,b,a),1-LoyalDogB$Buy)
LoyalDogeR = LoyalDogdp*LoyalDogB$Rev*marLoyalDog - x
c(x=x, eReturn=sum(LoyalDogeR), N=sum(LoyalDogeR > 0), eReturn2=sum(LoyalDogeR[LoyalDogeR > 0]))
}) %>% t %>% data.frame
#eReturn對所有人行銷的總預期效益
#n 預期收益大於0的人數
#eReturn 只對收益大於0的人做行銷的總預期收益
LoyalDogdf %>% gather('key','value',-x) %>%
ggplot(aes(x=x, y=value, col=key)) +
geom_hline(yintercept=0,linetype='dashed') +
geom_line(size=1.5,alpha=0.5) +
facet_wrap(~key,ncol=1,scales='free_y') + theme_bw()
針對該族群的所有人進行行銷,結果會發現總體預期報償會呈現負數,
然而在進行調整過後,發現針對預期報償大於0之顧客的總體預期報償則明顯提高
mm=c(0.20, 0.25, 0.15, 0.25)
bb=c( 25, 30, 15, 30)
aa=c( 40, 40, 30, 60)
X = seq(10, 60, 1)
LoyalDogdf2 = do.call(rbind, lapply(1:length(mm), function(i) {
sapply(X, function(x) {
LoyalDogdp2 = pmin(1-LoyalDogB$Buy, DP(x,mm[i],bb[i],aa[i]))
LoyalDogeR2 = LoyalDogdp2*LoyalDogB$Rev*marLoyalDog - x
c(i=i, x=x, eR.ALL=sum(LoyalDogeR2), N=sum(LoyalDogeR2>0), eR.SEL=sum(LoyalDogeR2[LoyalDogeR2 > 0]) )
}) %>% t %>% data.frame
}))
LoyalDogdf2 %>%
mutate_at(vars(eR.ALL, eR.SEL), function(y) round(y/1000)) %>%
gather('key','value',-i,-x) %>%
mutate(Instrument = paste0('I',i)) %>%
ggplot(aes(x=x, y=value, col=Instrument)) +
geom_hline(yintercept=0, linetype='dashed', col='blue') +
geom_line(size=1.5,alpha=0.5) +
xlab('工具選項(成本)') + ylab('預期報償(K)') +
ggtitle('行銷工具優化','假設行銷工具的效果是其成本的函數') +
facet_wrap(~key,ncol=1,scales='free_y') + theme_bw() -> p
plotly::ggplotly(p)
從圖中可發現行銷工具中,I3的預期報償表現顯著突出
當工具成本20時,對所有人的預期效益會是-583310,
目標客群人數為791,然而僅針對部分顧客進行行銷時為2046
group_by(LoyalDogdf2, i) %>% top_n(1,eR.SEL)## # A tibble: 4 x 5
## # Groups: i [4]
## i x eR.ALL N eR.SEL
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 32 -124049. 109 218.
## 2 2 38 -145268. 191 472.
## 3 3 20 -58331. 791 2047.
## 4 4 40 -165002. 48 104.
從工具的最佳參數中,可看出I3的效果最好,
因此對Loyaldog這個族群,我們以I3作為行銷工具。
m=0.2; b=25; a=40; x=30
Upstartdp = pmin(1-UpstartB$Buy, DP(x,m,b,a))
UpstarteR = Upstartdp*UpstartB$Rev*marUpstart - x
hist(UpstarteR,main="預期報償分佈",xlab="預期報償",ylab="顧客數")
針對Upstart這個族群分析預期報償,發現約落在-20到80之間,
於是我們根據上圖所得的結果,針對預期報償大於0之顧客進行促銷
m=0.2; b=25; a=40; X = seq(10,45,1)
Upstartdf = sapply(X, function(x) {
Upstartdp = pmin(DP(x,m,b,a),1-UpstartB$Buy)
UpstarteR = Upstartdp*UpstartB$Rev*marUpstart - x
c(x=x, eReturn=sum(UpstarteR), N=sum(UpstarteR > 0), eReturn2=sum(UpstarteR[UpstarteR > 0]))
}) %>% t %>% data.frame
#eReturn對所有人行銷的總預期效益
#n 預期收益大於0的人數
#eReturn 只對收益大於0的人做行銷的總預期收益
Upstartdf %>% gather('key','value',-x) %>%
ggplot(aes(x=x, y=value, col=key)) +
geom_hline(yintercept=0,linetype='dashed') +
geom_line(size=1.5,alpha=0.5) +
facet_wrap(~key,ncol=1,scales='free_y') + theme_bw()
針對該族群的所有人進行行銷,結果會發現總體預期報償的區間大於忠心小犬的區間,
然而在進行調整過後,發現針對預期報償大於0之顧客的總體預期報償則明顯提高
X = seq(10, 60, 1)
Upstartdf2 = do.call(rbind, lapply(1:length(mm), function(i) {
sapply(X, function(x) {
Upstartdp2 = pmin(1-UpstartB$Buy, DP(x,mm[i],bb[i],aa[i]))
UpstarteR2 = Upstartdp2*UpstartB$Rev*marUpstart - x
c(i=i, x=x, eR.ALL=sum(UpstarteR2), N=sum(UpstarteR2>0), eR.SEL=sum(UpstarteR2[UpstarteR2 > 0]) )
}) %>% t %>% data.frame
}))
Upstartdf2 %>%
mutate_at(vars(eR.ALL, eR.SEL), function(y) round(y/1000)) %>%
gather('key','value',-i,-x) %>%
mutate(Instrument = paste0('I',i)) %>%
ggplot(aes(x=x, y=value, col=Instrument)) +
geom_hline(yintercept=0, linetype='dashed', col='blue') +
geom_line(size=1.5,alpha=0.5) +
xlab('工具選項(成本)') + ylab('預期報償(K)') +
ggtitle('行銷工具優化','假設行銷工具的效果是其成本的函數') +
facet_wrap(~key,ncol=1,scales='free_y') + theme_bw() -> p
#er.all所有人都做
#er.sel er>0的人中 對他們做行銷的er
#er.n er>0的人數
plotly::ggplotly(p)group_by(Upstartdf2, i) %>% top_n(1,eR.SEL)## # A tibble: 4 x 5
## # Groups: i [4]
## i x eR.ALL N eR.SEL
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 35 29692. 3609 50655.
## 2 2 40 62608. 4376 78269.
## 3 3 22 48437. 4920 54118.
## 4 4 43 32467. 3512 59605.
從工具的最佳參數中,可看出I2的效果最好,
因此對upstart這個族群,我們以I2作為行銷工具
save(LoyalDogZ, LoyalDogX, LoyalDog, LoyalDog2, LoyalDogspl, LoyalDogspl2,UpstartZ, UpstartX, Upstart, Upstart2, Upstartspl, Upstartspl2, file="data/tf3.rdata")save(LoyalDogB, UpstartB, file='data/tf4.rdata')save(AB, AZ, Status2, file="data/tf5.rdata")